*! version 5.0
* 13 August 2018
* NIDS
* Master Expenditure do file for Nids Wave 3

* THIS IS 2nd EXPENDITURE DO FILE - MERGING DATASETS TOGETHER: 2 OF 6
* THIS DO FILE MERGES TOGETHER THE DATASETS REQUIRED FOR CREATING THE EXPENDITURE VARIABLES

*=====================================================================================================================================
* GLOBALS FOR DATA FILES, DO FILES AND VERSION SUFFIXES

* DEFINED IN "Master expenditure do file (1 of 6)"

version 12.1

*=====================================================================================================================================

* OPENING THE PROXY, CHILD AND ADULT, ADJUSTING WHERE NESSESARY AND APPENING THE 3 DATASETS

* PROXY
use "$DataIN\Proxy_$VersionIN.dta", clear
rename w3_p_* *
rename emactcur prox_emp
forvalues x=1/5 {
	rename em1inc_brac`x' prox_em1inc_s`x'
	}
sort pid
gen proxy=1
save "$DataOUT\proxy_append.dta", replace

* CHILD
use "$DataIN\Child_$VersionIN.dta", clear
rename w3_c_* *
sort pid
gen proxy=0

save "$DataOUT\child_append.dta", replace

* ADULT
use "$DataIN\Adult_$VersionIN.dta", clear
rename w3_a_* *
sort pid
gen proxy=0
drop relocate


* APPENDING
append using "$DataOUT\child_append.dta"
append using "$DataOUT\proxy_append.dta"

label variable proxy "Data from proxy survey"
label define dummy 0 "No" 1 "Yes"
label values proxy dummy

sort w3_hhid
save "$DataOUT\acp.dta", replace

* Now bringing in the HH Questionnaire dataset, after doing some cleaning.

use "$DataIN\HHQuestionnaire_$VersionIN.dta", clear

rename w3_h_* *

*Dropping all HH dead
drop if outcome==6

*HH year, month and date of interview
gen hhintmonth = intrv_m

*HH-level (one-shot) income
recode tinc (min/-3=.), gen(hhq_inc)
label variable hhq_inc "HH (one shot) income from HH questionnaire"

*Sorting out the one-shot income brackets
gen temp=.
replace temp=250 if tinc_brac4==3
replace temp=500 if tinc_brac4==2
replace temp=950 if tinc_brac4==1 & tinc_brac2==3
replace temp=1400 if tinc_brac2==2
replace temp=2200 if tinc_brac2==1 & tinc_brac1==3
replace temp=3000 if tinc_brac1==2
replace temp=3750 if tinc_brac1==1 & tinc_brac3==3
replace temp=4500 if tinc_brac3==2
replace temp=7000 if tinc_brac3==1 & tinc_brac5==3
replace temp=9500 if tinc_brac5==2
replace temp=16500 if tinc_brac5==1 & tinc_brac6==3
replace temp=23500 if tinc_brac6==2
replace temp=47000 if tinc_brac6==1


gen hhq_inc_ib=temp
gen hhq_incb=hhq_inc
replace hhq_incb=hhq_inc_ib if hhq_inc_ib!=. & hhq_incb==.
label variable hhq_incb "HH income from HH questionnaire incl brackets and (logical) imps"
drop temp

*Housing status
gen homestatus=.
replace homestatus=1 if rnt==1 & ownd==2
replace homestatus=2 if ownpaid==2 & ownd==1
replace homestatus=3 if ownpaid==1 & ownd==1
replace homestatus=4 if rnt==2 & ownd==2
replace homestatus=5 if homestatus==.
label define homestatus 1 "Renting" 2 "Homeowners with mortgage" 3 "Homeowners no mortgage" ///
4 "Don't own or rent" 5 "Missing"
label values homestatus homestatus
gen mortgage=1 if homestatus==2
replace mortgage=0 if homestatus!=2 & homestatus!=.

*Housing rent
recode rntpay (min/0=.), gen(rentpay)
gen rentpay_rec=rnt==1
lab var rentpay_rec "Household pays rent"
gen rentpay_d=rentpay!=.

*Dwelling characteristics
recode dwltyp (-9/-3=.) (1=1 "Dwelling/house or brick structure") (2=2 "Traditional dwelling/hut/structure") ///
(3=3 "Flat or apartment") (4=4 "Town/cluster/semi-detached house") (5=3) (6=6 "Dwelling/house/flat/room in backyard") ///
(7=7 "Informal dwelling/shack in backyard") (8=8 "Informal dwelling/shack not in backyard") (9=9 "Room/flatlet") ///
(10=11) (11=11 "Other"), gen(hometype)
label variable hometype "Type of dwelling"
gen hometype_d=hometype!=.
replace hometype=0 if hometype==.
lab def hometype 0 "Missing", add
recode dwlrms (-9/-3=.) (35/max=.), gen(homerooms) 
label variable homerooms "Number of rooms in dwelling unit"
gen homerooms_d=homerooms!=.
replace homerooms=0 if homerooms==.
gen homeroomssq=homerooms^2
recode dwlmatroof (-9/-3=.) (1=1 "Bricks or cement") (2=1) (3=3 "Corrugated iron/zinc") (4/8=7 "Other") ///
(9=4 "Tile") (10=7 "Other") (11=5 "Thatching") (12=6 "Asbestos/cement roof sheeting") (13=7), gen(homeroof)
label variable homeroof "Dwelling unit's roofing material"
gen homeroof_d=homeroof!=.
replace homeroof=0 if homeroof==.
lab def homeroof 0 "Missing", add
recode dwlmatrwll (-9/-3=.) (1=1 "Bricks") (2=2 "Cement block/concrete") (3=3 "Corrugated iron/zinc") ///
(7=4 "Mixture of mud and cement") (10=5 "Mud bricks") (4/6=6 "Other") (8/9=6) (11/13=6), gen(homewalls)
label variable homeroof "Dwelling unit's walling material"
gen homewalls_d=homewalls!=.
replace homewalls=0 if homewalls==.
lab def homewalls 0 "Missing", add

*Rent and owndership variables for imputed rental income stuff
gen ownhome=ownd==1
lab var ownhome "Household member owns home"
gen ownfullypaid=ownpaid==1
lab var ownfullypaid "Property is fully paid off"

gen hrcorr=homeroof==3

keep w3_hhid hometype homerooms homeroomssq homeroof hrcorr homewalls hometype_d homerooms_d homeroof_d homewalls_d ///
hhq_inc hhq_inc_ib hhq_incb tinc_brac* tinc rntpot rentpay homestatus mortgage ///
ownhome ownfullypaid hhintmonth rentpay rentpay_rec rentpay_d
sort w3_hhid
save "$DataOUT\hhquestionmerge.dta", replace

use "$DataOUT\acp.dta", clear
merge m:1 w3_hhid using "$DataOUT\hhquestionmerge.dta"
drop _merge

save "$DataOUT\acphhq.dta", replace

* Bringing in the Household Roster Dataset.

use "$DataIN\HouseholdRoster_$VersionIN.dta", clear

merge m:1 w3_hhid using "$DataIN\HHQuestionnaire_$VersionIN.dta", keepusing(w3_h_outcome)
drop if w3_h_outcome==6   // all res CSM's dead
drop _merge w3_h_outcome

cap rename w3_r_* *
cap drop age 
cap drop gen

*Dropping all the non-resident members, and all dead respondents
keep if pres==1
drop if dead==1

merge 1:1 pid using "$DataIN\indderived_$VersionIN.dta", keepusing(w3_best_age_yrs w3_best_gen)
drop _merge

*HH size and children in the HH
gen temp=1 if pid!=.
replace temp=0 if mem==2

egen hhsizer=count(temp), by(w3_hhid) 
label variable hhsizer "Number of household residents"
drop temp

gen resmem=1

*Resident children in the HH
gen ageunder15=w3_best_age_yrs<15 if resmem==1 & w3_best_age_yrs>=0
gen ageunder18=w3_best_age_yrs<18 if resmem==1 & w3_best_age_yrs>=0
egen hhchildren=sum(ageunder15), by(w3_hhid)
egen hhchildren18=sum(ageunder18), by(w3_hhid)
label variable hhchildren "Number of household children<15"
label variable hhchildren18 "Number of household children<18"
drop ageunder15
drop ageunder18

recode w3_best_gen (2=0) (3/24=.), gen(roster_male)

keep pid w3_hhid marstt hhsizer hhchildren hhchildren18 edu roster_male mem w3_best_age_yrs

recode marstt (-9/-3=.) (1/2=1) (3/5=0), gen(roster_married)
gen roster_married_d=roster_married!=.
replace roster_married=0 if roster_married==.
rename marstt roster_mstatus
label variable roster_married "Married or living with partner"
drop if mem==2  /*|mem==999*/
drop mem
sort pid

save "$DataOUT\newhhr.dta", replace

use "$DataOUT\acphhq", clear

merge 1:1 pid w3_hhid using "$DataOUT\newhhr.dta"
drop if _merge == 1
drop _merge
drop if pid==.  
save "$DataOUT\temp.dta", replace

*===============================================================================================================================

*Bringing in individual derived variables

use "$DataIN\indderived_$VersionIN.dta", clear
renpfix w3_
keep pid best_race best_gen  best_edu best_dob_m best_dob_y age_intervals  
save "$DataOUT\temp2.dta", replace

use "$DataOUT\temp.dta", clear
merge 1:1 pid using "$DataOUT\temp2.dta"
drop _merge
save "$DataOUT\temp3.dta", replace 

*===============================================================================================================================

*Bringing in the household derived variables

use "$DataIN\hhderived_$VersionIN.dta", clear
renpfix w3_
rename hhid w3_hhid

rename prov2011 province
replace province=11 if province==-3
replace province=11 if province==.
lab def province 11 "Missing"

replace geo2011=5 if geo2011==.
lab def w2_hhgeo 5 "MISSING", add

keep w3_hhid province geo2011
save "$DataOUT\temp4.dta", replace

use "$DataOUT\temp3.dta", clear
merge m:1 w3_hhid using "$DataOUT\temp4.dta"
drop if _merge == 2
drop _merge
save "$DataOUT\temp5.dta", replace

*===============================================================================================================================

*Bringing in the Link File

use "$DataIN\Link_File_$VersionIN.dta", clear

ren w3_hhid  hhid 

save "$DataOUT\temp6.dta", replace

use "$DataOUT\temp5", clear
merge 1:1 pid using "$DataOUT\temp6.dta"
drop if _merge!=3
drop _merge


*===============================================================================================================================
*Drop all out individuals in HHNR households
count if w3_hh_outcome == 1
if r(N)<5000 {
di as error "Check outcome"
exit 1
}

drop if w3_hh_outcome != 1

save "$DataOUT\data.dta", replace

erase "$DataOUT\child_append.dta"
erase "$DataOUT\proxy_append.dta"
erase "$DataOUT\acphhq.dta"
erase "$DataOUT\newhhr.dta"
erase "$DataOUT\temp.dta"

forvalues q=2/6 {
erase "$DataOUT\temp`q'.dta"
}

erase "$DataOUT\acp.dta"
erase "$DataOUT\hhquestionmerge.dta"

* end of do file
*===================================================================================================================================
*check
